


import pandas as pd
import sys
import statsmodels.formula.api as smf

# When running this file usually also add argument which is the path for the table
try:
    overleaf_path = sys.argv[1]
except:
    overleaf_path = './../Apps/Overleaf/bbm/draft/reports/revision/'

#%% IMPORT DATA -------------------------------------------------------------------------
df = pd.read_csv('./data_py/processed/contracts_final.csv', index_col=[0])
df['fixture_date'] = pd.to_datetime(df['fixture_date'])
df['year'] = df['fixture_date'].dt.year
df['month'] = df['fixture_date'].dt.month

# De-mean state variable controls so that rig-type indicators are interpretable
for k in ['g', 'n_l', 'n_m', 'n_h', 'mri']:
    df[k] = df[k] - df[k].mean()

df['high_mri'] = (df['mri'] > df['mri'].quantile(0.9))
df['gas'] = df['gas'] #- 6.66
#%% SET FORMULAS FOR REGRESSIONS --------------------------------------------------------
formulas_by_reg = dict()
summary_to_tex = dict()

# Set formulas
formulas_by_reg[0] = (
    "day_rate ~ C(high_mri, Treatment(reference=False)):g "
    "+ C(spec, Treatment(reference='low')):C(high_mri, Treatment(reference=False))"
    "+ C(spec, Treatment(reference='low')):g"
    "+ C(spec, Treatment(reference='low')):C(high_mri, Treatment(reference=False)):g"
    "-1"
)

formulas_by_reg[1] = (
    "day_rate ~ C(high_mri, Treatment(reference=False)):g "
    "+ C(spec, Treatment(reference='low')):C(high_mri, Treatment(reference=False))"
    "+ C(spec, Treatment(reference='low')):g"
    "+ C(spec, Treatment(reference='low')):C(high_mri, Treatment(reference=False)):g"
    "+ gas : value -1"
)
formulas_by_reg[2] = (
    "day_rate ~ C(high_mri, Treatment(reference=False)):g "
    "+ C(spec, Treatment(reference='low')):C(high_mri, Treatment(reference=False))"
    "+ C(spec, Treatment(reference='low')):g"
    "+ C(spec, Treatment(reference='low')):C(high_mri, Treatment(reference=False)):g"
    "+ gas : value + tau -1"
)

coefs_to_tex_names = {
    "C(spec, Treatment(reference='low'))[T.high]:C(high_mri, Treatment(reference=False))[T.True]:g": 'delta'
}

#%% DO REGRESSIONS ----------------------------------------------------------------------
# Do regressions
for i in formulas_by_reg:
    reg = smf.ols(
        formula=formulas_by_reg[i],
        data=df
    ).fit(cov_type='HC0')

    #print(reg.summary())

    summary_to_tex[f"r_{i}"] = round(reg.rsquared, 2)
    summary_to_tex[f"n_{i}"] = int(reg.nobs)
    for k in coefs_to_tex_names:
        try:
            summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = str(round(1000 * reg.params[k], 2))
            summary_to_tex[f"se_{coefs_to_tex_names[k]}_{i}"] = str(round(1000 * reg.HC0_se[k], 2))

            p_value = reg.pvalues[k]
            # Add stars to the coefficients in the table
            if p_value < 0.01:
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "***"
            if (p_value >= 0.01) & (p_value < 0.05):
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "**"
            if (p_value >= 0.05) & (p_value < 0.1):
                summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] = \
                    summary_to_tex[f"{coefs_to_tex_names[k]}_{i}"] + "*"
        except:
            pass
            #print(f"{k} not in regression {i}")

#%% PRODUCE THE TABLE -------------------------------------------------------------------
with open('./src/tex/table_synergies.tex', 'r') as f:
    tex = f.read()
    output = tex.format(**summary_to_tex)

with open(overleaf_path + 'tables/table_synergies.tex', 'w') as f:
    f.write(output)
